/****************************************************************
*****************************************************************/

*by Xiaodong Fan, fanxiaodong@gmail.com

#delimit ;

cap log close;
clear all;
drop _all;
set more 1;
pause on;

log using log_MSM_cf_profile_infile.log, replace;

global gdtadir ".";

include do_globals.do;



local lvbudgeta b_output b_sstax b_medicaretax b_finctax b_ssb b_ssdiinc wage;

local lvprofiles A H AIME C I I_cond L labor lfpr lfpr_eq lfpr_ft lfpr_pt olabor inc lnw wage
                 dlnw dlnw_full dlnw_base dlnw_full_base  
                 irecss ssa EV;

local lvprofiles_diff lfpr_diff_E2G lfpr_diff_G2B lfpr_diff_B2D
                      lnw_diff_E2G lnw_diff_G2B lnw_diff_B2D;
local lvprofiles_p50 p50A p50H p50C p50I p50I_cond p50labor p50inc p50lnw;  

/******************************************************
  life-cycle profiles. baseline and counterfactuals
******************************************************/
if (strpos("`c(pwd)'", "pthealth") > 0) {;
    local lcfpth 10 11 12;
};
else {;
    local lcfpth "";
};

local lvjvc = 0;
foreach jv in 0 1 2 3 4 5 6 7 8 9 `lcfpth' 
              20 21 22 23 24 25 26 27 28 {;
              *28 29 30 31 32 33 34 35 36 37 38 39 40 {;
   if (`jv'==0) {;
      local jvname "";
   };
   else {;
      local jvname "_CF`jv'";
   };

   if (1>0) {;
       * profiles;
       prog_MSM_indiv_infile "${cwd_indiv}/txt_MSM_Indiv`jvname'.txt";
       gen lfpr_eq = lfpr_ft + 0.5 * lfpr_pt;

     /******/
       preserve;
           gen cfid = `jv';
           compress;
           if (`lvjvc'>0) {;
               append using ${cwd_indiv}/dta_MSM_Indiv_withCF.dta;
           };
           sort cfid id t;
           save ${cwd_indiv}/dta_MSM_Indiv_withCF.dta, replace;
       restore;
     /*****/
   };
   else {;
      use ${cwd_indiv}/dta_MSM_Indiv_withCF.dta if cfid == `jv', clear;
   };

   *** retirement age', which might be defined as the last period someone works;
   preserve;
       keep if lfpr == 1;
       sort id t;
       by id: keep if [_n]==[_N];
       sort t;
       collapse (count) retage = lfpr, by(t);
       sort t;
       egen tot = total(retage);
       replace retage = retage / tot;
       keep t retage;
       gen cfid = `jv';
       sort cfid t;
       compress;
       if (`lvjvc'>0) {;
           append using ${gdtadir}/dta_MSM_cf_Profiles_retage.dta;
       };
       
       sort cfid t;
       compress;
       save ${gdtadir}/dta_MSM_cf_Profiles_retage.dta, replace;
   restore;

   gen wage = exp(lnw);
   compress;

   * the budgent analysis;
   preserve;
       sort id t;
       collapse (sum) `lvbudgeta', by(id);
       gen cfid = `jv';
       collapse (mean) `lvbudgeta', by(cfid);
       if (`lvjvc'>0) {;
           append using ${gdtadir}/dta_MSM_cf_budget.dta;
       };
       sort cfid;
       compress;
       save ${gdtadir}/dta_MSM_cf_budget.dta, replace;
   restore;

   * lfpr_diff and lnw_diff;
   foreach ijv in 1 2 3 4 {;
       gen lfpr`ijv' = lfpr if ihealth==`ijv';
       gen lnw`ijv' = lnw if ihealth==`ijv'; 
   };

   collapse (mean) `lvprofiles' lfpr1 lfpr2 lfpr3 lfpr4 lnw1 lnw2 lnw3 lnw4 
             (p50) p50A=A p50H=H p50C=C p50I=I p50I_cond=I_cond
                   p50labor=labor p50inc=inc p50lnw=lnw  // p50lnw_fe=lnw_fe 
            (sd) sdlnw=lnw (count) nn=id, by(t);
   sort t;
   gen lfpr_exit = lfpr[_n-1] - lfpr;

   replace dlnw = dlnw_base if t==${gvdata0}; 
   gen lnw_fd = sum(dlnw) if t>=${gvdata0};
 
   replace dlnw_full = dlnw_full_base if t==${gvdata0}; 
   gen lnw_fd_full = sum(dlnw_full) if t>=${gvdata0};  // here sum() does not include anything before ${gvdata0}

   foreach iiv in lfpr lnw {;
       gen `iiv'_diff_E2G = `iiv'1 - `iiv'2;
       gen `iiv'_diff_G2B = `iiv'2 - `iiv'3;
       gen `iiv'_diff_B2D = `iiv'3 - `iiv'4;
   };

   gen cfid = `jv';
   if (`lvjvc'>0) {;
      append using ${gdtadir}/dta_MSM_cf_Profiles.dta;
   };
   sort cfid t;
   compress;
   save ${gdtadir}/dta_MSM_cf_Profiles.dta, replace;

   * moments;
   prog_MSM_moments_infile "txt_MSM_Moments`jvname'.txt" "";  // variables with suffix _f;
   *ren *_f *;
   gen byte cfid = `jv';
   if (`lvjvc'>0) {;
      append using  ${gdtadir}/dta_MSM_cf_Moments.dta;
   };
   else {;
       local lvjvc = 1;
   };

   compress;
   save ${gdtadir}/dta_MSM_cf_Moments.dta, replace;
};

* merge;

* Moments from the sipp data;
*do do_MSM_infile_sippmoments.do;

use ${gdtadir}/dta_sipp_Moments.dta, clear;
cap drop col;
gen byte cfid = 99;
ren *_d *_f;  // to be consistent with the variable names in dta_MSM_cf_Moments.dta;
drop lfpr_exit_f irecss_f lfpr1to0_f lfpr0to1_f;

append using ${gdtadir}/dta_MSM_cf_Moments.dta;


* use the profiles which are longer, to 80;
*drop lfpr lnw sdlnw C;
sort cfid t;
save ${gdtadir}/dta_MSM_cf_Moments.dta, replace;

local lvprofilesall `lvprofiles' `lvprofiles_diff' `lvprofiles_p50' lfpr_exit lnw_fd lnw_fd_full sdlnw nn;

* merge to get other profiles for the baseline and counterfactuals;
use cfid t `lvprofilesall' using ${gdtadir}/dta_MSM_cf_Profiles.dta, clear;
sort cfid t;
merge 1:1 cfid t using ${gdtadir}/dta_MSM_cf_Moments.dta;
tab _merge;
*pause;
drop _merge;

sort cfid;
merge 1:1 cfid t using ${gdtadir}/dta_MSM_cf_Profiles_retage.dta, nogenerate;
compress;

sort cfid t;
save ${gdtadir}/dta_MSM_cf_Moments_long.dta, replace;

*drop *_f;
reshape wide `lvprofilesall' retage
             lfpr_f lnw_f sdlnw_f lnw_fd_f alfpr1to0_f alfpr0to1_f C_f ssa_f
             lfpr_diff_E2G_f lfpr_diff_G2B_f lfpr_diff_B2D_f lfpr_pt_f plnw41_f, i(t) j(cfid);
compress;
sort t;
save ${gdtadir}/dta_MSM_cf_Moments.dta, replace;


cap log close;




